LSTM-ED for Anomaly Detection in Time Series Data¶
In [ ]:
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from dataset import *
from plots import *
from metrics import *
from models_funtions import *
# Set style for matplotlib
plt.style.use("Solarize_Light2")
import plotly.io as pio
pio.renderers.default = "notebook_connected"
In [ ]:
# Path to the root directory of the dataset
ROOTDIR_DATASET_NORMAL = '../dataset/normal'
ROOTDIR_DATASET_ANOMALY = '../dataset/collisions'
# TF_ENABLE_ONEDNN_OPTS=0 means that the model will not use the oneDNN library for optimization
import os
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
Variours parameters¶
In [ ]:
#freq = '1.0'
#freq = '0.1'
freq = '0.01'
#freq = '0.005'
file_name_normal = "_20220811_rbtc_"
file_name_collisions = "_collision_20220811_rbtc_"
recording_normal = [0, 2, 3, 4]
recording_collisions = [1, 5]
freq_str = freq.replace(".", "_")
features_folder_normal = f"./features/normal{freq_str}/"
features_folder_collisions = f"./features/collisions{freq_str}/"
Data¶
In [ ]:
df_features_normal, df_normal_raw, _ = get_dataframes(ROOTDIR_DATASET_NORMAL, file_name_normal, recording_normal, freq, f"{features_folder_normal}")
df_features_collisions, df_collisions_raw, df_collisions_raw_action = get_dataframes(ROOTDIR_DATASET_ANOMALY, file_name_collisions, recording_collisions, freq, f"{features_folder_collisions}1_5/")
df_features_collisions_1, df_collisions_raw_1, df_collisions_raw_action_1 = get_dataframes(ROOTDIR_DATASET_ANOMALY, file_name_collisions, [1], freq, f"{features_folder_collisions}1/")
df_features_collisions_5, df_collisions_raw_5, df_collisions_raw_action_5 = get_dataframes(ROOTDIR_DATASET_ANOMALY, file_name_collisions, [5], freq, f"{features_folder_collisions}5/")
Loading data. Found 31 different actions. Loading data done. Loading features from file. --- 0.04478168487548828 seconds --- Loading data. Found 31 different actions. Loading data done. Loading features from file. --- 0.027612686157226562 seconds --- Loading data. Found 31 different actions. Loading data done. Loading features from file. --- 0.020544767379760742 seconds --- Loading data. Found 31 different actions. Loading data done. Loading features from file. --- 0.0175931453704834 seconds ---
In [ ]:
X_train, y_train, X_test, y_test, df_test = get_train_test_data(df_features_normal, df_features_collisions, full_normal=True)
X_train_1, y_train_1, X_test_1, y_test_1, df_test_1 = get_train_test_data(df_features_normal, df_features_collisions_1, full_normal=True)
X_train_5, y_train_5, X_test_5, y_test_5, df_test_5 = get_train_test_data(df_features_normal, df_features_collisions_5, full_normal=True)
c:\Users\VG User\Documents\GitHub\MLinAPP-FP01-14\.venv\Lib\site-packages\sklearn\base.py:493: UserWarning: X does not have valid feature names, but VarianceThreshold was fitted with feature names c:\Users\VG User\Documents\GitHub\MLinAPP-FP01-14\.venv\Lib\site-packages\sklearn\base.py:493: UserWarning: X does not have valid feature names, but VarianceThreshold was fitted with feature names c:\Users\VG User\Documents\GitHub\MLinAPP-FP01-14\.venv\Lib\site-packages\sklearn\base.py:493: UserWarning: X does not have valid feature names, but VarianceThreshold was fitted with feature names
Collisions¶
In [ ]:
collisions_rec1, collisions_init1 = get_collisions('1', ROOTDIR_DATASET_ANOMALY)
collisions_rec5, collisions_init5 = get_collisions('5', ROOTDIR_DATASET_ANOMALY)
# Merge the collisions of the two recordings in one dataframe
collisions_rec = pd.concat([collisions_rec1, collisions_rec5])
collisions_init = pd.concat([collisions_init1, collisions_init5])
In [ ]:
collisions_zones, y_collisions = get_collisions_zones_and_labels(collisions_rec, collisions_init, df_features_collisions)
collisions_zones_1, y_collisions_1 = get_collisions_zones_and_labels(collisions_rec1, collisions_init1, df_features_collisions_1)
collisions_zones_5, y_collisions_5 = get_collisions_zones_and_labels(collisions_rec5, collisions_init5, df_features_collisions_5)
Autoencoder for Anomaly Detection in Time Series Data¶
In [ ]:
from algorithms.autoencoder import AutoEncoder
classifier = AutoEncoder(
name='AutoEncoder',
num_epochs=100,
batch_size=32,
lr=1e-3,
hidden_size=32,
sequence_length=5,
train_gaussian_percentage=0.25,
seed=42,
gpu=None,
details=True
)
# Train the AutoEncoder on normal data
classifier.fit(X_train)
print("AutoEncoder training completed.")
0%| | 0/100 [00:00<?, ?it/s]c:\Users\VG User\Documents\GitHub\MLinAPP-FP01-14\.venv\Lib\site-packages\torch\nn\_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='sum' instead. 100%|██████████| 100/100 [00:35<00:00, 2.79it/s]
AutoEncoder training completed.
c:\Users\VG User\Documents\GitHub\MLinAPP-FP01-14\.venv\Lib\site-packages\torch\nn\_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
Predictions¶
In [ ]:
df_test = get_statistics(X_test, y_collisions, classifier, df_test, freq, threshold_type="mad")
df_test_1 = get_statistics(X_test_1, y_collisions_1, classifier, df_test_1, freq, threshold_type="mad")
df_test_5 = get_statistics(X_test_5, y_collisions_5, classifier, df_test_5, freq, threshold_type="mad")
Anomaly prediction completed.
Number of anomalies detected: 3 with threshold 127284.064310203, std
Number of anomalies detected: 118 with threshold 277.22257924436053, mad
Number of anomalies detected: 16 with threshold 1788.560854494446, percentile
Number of anomalies detected: 8 with threshold 2471.19164716255, IQR
Number of anomalies detected: 306 with threshold 0.0, zero
choosen threshold type: mad, with value: 277.2226
F1 Score: 0.9327
Accuracy: 0.9510
Precision: 0.8814
Recall: 0.9905
precision recall f1-score support
0 0.99 0.93 0.96 201
1 0.88 0.99 0.93 105
accuracy 0.95 306
macro avg 0.94 0.96 0.95 306
weighted avg 0.96 0.95 0.95 306
ROC AUC Score: 0.9755
Anomalies detected: 118
Best threshold: 302.4222 | F1 Score: 0.9364 | Precision: 0.8957 | Recall: 0.9810
Anomalies detected with best threshold: 115
-------------------------------------------------------------------------------------
Anomaly prediction completed.
Number of anomalies detected: 1 with threshold 98839.11998808103, std
Number of anomalies detected: 52 with threshold 155.31338082887692, mad
Number of anomalies detected: 9 with threshold 1397.6220121615902, percentile
Number of anomalies detected: 19 with threshold 604.7902062795376, IQR
Number of anomalies detected: 164 with threshold 0.0, zero
choosen threshold type: mad, with value: 155.3134
F1 Score: 0.8046
Accuracy: 0.8963
Precision: 0.6731
Recall: 1.0000
precision recall f1-score support
0 1.00 0.87 0.93 129
1 0.67 1.00 0.80 35
accuracy 0.90 164
macro avg 0.84 0.93 0.87 164
weighted avg 0.93 0.90 0.90 164
ROC AUC Score: 0.9754
c:\Users\VG User\Documents\GitHub\MLinAPP-FP01-14\src\models_funtions.py:67: RuntimeWarning: invalid value encountered in divide c:\Users\VG User\Documents\GitHub\MLinAPP-FP01-14\.venv\Lib\site-packages\torch\nn\_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
Anomalies detected: 52
Best threshold: 238.2388 | F1 Score: 0.8974 | Precision: 0.8140 | Recall: 1.0000
Anomalies detected with best threshold: 43
-------------------------------------------------------------------------------------
Anomaly prediction completed.
Number of anomalies detected: 2 with threshold 154738.20813112837, std
Number of anomalies detected: 9 with threshold 1835.3532185194576, mad
Number of anomalies detected: 8 with threshold 1960.5651049060532, percentile
Number of anomalies detected: 3 with threshold 3109.410982117336, IQR
Number of anomalies detected: 141 with threshold 0.0, zero
choosen threshold type: mad, with value: 1835.3532
F1 Score: 0.1846
Accuracy: 0.6241
Precision: 0.6667
Recall: 0.1071
precision recall f1-score support
0 0.62 0.96 0.76 85
1 0.67 0.11 0.18 56
accuracy 0.62 141
macro avg 0.64 0.54 0.47 141
weighted avg 0.64 0.62 0.53 141
ROC AUC Score: 0.9200
c:\Users\VG User\Documents\GitHub\MLinAPP-FP01-14\src\models_funtions.py:67: RuntimeWarning: invalid value encountered in divide c:\Users\VG User\Documents\GitHub\MLinAPP-FP01-14\.venv\Lib\site-packages\torch\nn\_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
Anomalies detected: 9 Best threshold: 772.2594 | F1 Score: 0.8730 | Precision: 0.7857 | Recall: 0.9821 Anomalies detected with best threshold: 70 -------------------------------------------------------------------------------------
c:\Users\VG User\Documents\GitHub\MLinAPP-FP01-14\src\models_funtions.py:67: RuntimeWarning: invalid value encountered in divide
In [ ]:
plot_anomalies_true_and_predicted(df_collisions_raw, df_collisions_raw_action, collisions_zones, df_test, title="Collisions zones vs predicted zones for both recordings")
In [ ]:
plot_anomalies_true_and_predicted(df_collisions_raw_1, df_collisions_raw_action_1, collisions_zones_1, df_test_1, title="Collisions zones vs predicted zones for recording 1")
In [ ]:
plot_anomalies_true_and_predicted(df_collisions_raw_5, df_collisions_raw_action_5, collisions_zones_5, df_test_5, title="Collisions zones vs predicted zones for recording 5")